knitr::opts_chunk$set(echo = TRUE)
# Set working directory
setwd("C:\\Users\\Ran\\OneDrive\\cabi-trip-history-data")
# Remove exisiting data
rm(list=ls(all=TRUE))
# Load libraries
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(scales)
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
##
## here
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
library(ggmap)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Link for Capital Bike Share program in Washington DC https://s3.amazonaws.com/capitalbikeshare-data/index.html
# Public data for Capital Bike Share program in Washington DC
load("FinalDataNew.rdata")
FinalDataNew <- FinalData
head(FinalDataNew)
## TotalDurationMs StartDate EndDate
## 117692 489000 2010-09-15 14:05:00 2010-09-15 14:13:00
## 117691 970000 2010-09-16 10:11:00 2010-09-16 10:27:00
## 117690 14000 2010-09-17 11:00:00 2010-09-17 11:00:00
## 117689 76000 2010-09-17 11:04:00 2010-09-17 11:05:00
## 117688 9000 2010-09-17 11:05:00 2010-09-17 11:06:00
## 117686 7000 2010-09-17 11:19:00 2010-09-17 11:19:00
## StartStationNumber StartStation EndStationNumber
## 117692 31009 27th & Crystal Dr 31009
## 117691 31500 4th & Adams St NE 31500
## 117690 31101 14th & V St NW 31101
## 117689 31101 14th & V St NW 31101
## 117688 31101 14th & V St NW 31101
## 117686 31101 14th & V St NW 31101
## EndStation BikeNumber AccountType
## 117692 27th & Crystal Dr W00294 Casual
## 117691 4th & Adams St NE W00377 Casual
## 117690 14th & V St NW W00824 Registered
## 117689 14th & V St NW W00824 Registered
## 117688 14th & V St NW W00824 Registered
## 117686 14th & V St NW W00824 Registered
dim(FinalDataNew) # the length of the data is 13,623,103
## [1] 16108666 9
ggplot(SummaryDateAndTime, aes(x = hour, y = N, colour = weekday)) +
geom_point(data = weekday_summary, aes(group = weekday)) +
geom_line(data = weekday_summary, aes(group = weekday)) +
scale_x_discrete("hour") +
scale_y_continuous("N") +
theme_minimal() +
ggtitle("Different demands in weekday and weekend.\n") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title=element_text(size=18))
# Create a dataframe with StartStationNumber and Weekday
StartStationinWeek.df <- data.frame(FinalDataNew$StartStationNumber, SummaryDateAndTime$weekday)
colnames(StartStationinWeek.df) <- c('StartStationNumber', 'SummaryWeekday')
head(StartStationinWeek.df)
## StartStationNumber SummaryWeekday
## 1 31009 Wednesday
## 2 31500 Thursday
## 3 31101 Friday
## 4 31101 Friday
## 5 31101 Friday
## 6 31101 Friday
weekends <- c('Saturday', 'Sunday') #define Sat and Sun as weekend
# If the day is Saturday or Sunday, labels the day as "weekend", other wise labels it as "weekday"
StartStationinWeek.df$SummaryWeekday <- factor((StartStationinWeek.df$SummaryWeekday %in% weekends),
levels=c(TRUE, FALSE), labels=c('weekend', 'weekday'))
colnames(StartStationinWeek.df) <- c('StartStationNumber', 'WeekdayOrWeekend')
head(StartStationinWeek.df)
## StartStationNumber WeekdayOrWeekend
## 1 31009 weekday
## 2 31500 weekday
## 3 31101 weekday
## 4 31101 weekday
## 5 31101 weekday
## 6 31101 weekday
library(plyr)
SummaryStartStationinWeek.df <- count(StartStationinWeek.df, c('StartStationNumber', 'WeekdayOrWeekend'))
head(SummaryStartStationinWeek.df)
## StartStationNumber WeekdayOrWeekend freq
## 1 31000 weekend 2607
## 2 31000 weekday 6765
## 3 31001 weekend 6751
## 4 31001 weekday 15738
## 5 31002 weekend 8331
## 6 31002 weekday 20980
SummaryStationWeekday.df <- subset(SummaryStartStationinWeek.df[c(1,3)],
SummaryStartStationinWeek.df$WeekdayOrWeekend == 'weekday' &
!is.na(SummaryStartStationinWeek.df$StartStationNumber))
SummaryStationWeekend.df <- subset(SummaryStartStationinWeek.df[c(1,3)],
SummaryStartStationinWeek.df$WeekdayOrWeekend == 'weekend' &
!is.na(SummaryStartStationinWeek.df$StartStationNumber))
# Scale the freq in weekday and weekend
SummaryStationWeekday.df$freq <- SummaryStationWeekday.df$freq/5
SummaryStationWeekend.df$freq <- SummaryStationWeekend.df$freq/2
# Sort the Station by freq (from high to low)
SummaryStationWeekday.df <- SummaryStationWeekday.df[order(-SummaryStationWeekday.df$freq),]
SummaryStationWeekend.df <- SummaryStationWeekend.df[order(-SummaryStationWeekend.df$freq),]
# Find the top 5 rental station in weekday
Top5StationInWeekday.df <- SummaryStationWeekday.df[1:5,]
TopStationInWeekday.df <- cbind(Top5StationInWeekday.df[1], 'Weekday')
colnames(TopStationInWeekday.df)[2] <- 'Week'
# Find the top 5 rental station in weekend
Top5StationInWeekend.df <- SummaryStationWeekend.df[1:5,]
TopStationInWeekend.df <- cbind(Top5StationInWeekend.df[1], 'Weekend')
colnames(TopStationInWeekend.df)[2] <- 'Week'
# Merge the top 5 rental staions in weekday and weekend
TopStationInWeek.df <- merge(TopStationInWeekday.df,TopStationInWeekend.df, by = 'StartStationNumber', all = TRUE)
TopStationInWeek.df$Week.x <- as.character(TopStationInWeek.df$Week.x)
# If the station in included in both weekday and weekend stations label it as top stations all week
TopStationInWeek.df$Week.x[!is.na(TopStationInWeek.df$Week.x) & !is.na(TopStationInWeek.df$Week.y)] <- 'AllWeek'
TopStationInWeek.df$Week.x[is.na(TopStationInWeek.df$Week.x)] <- 'Weekend'
TopStationInWeek.df <- TopStationInWeek.df[1:2]
colnames(TopStationInWeek.df)[2] <- 'TopRentalStations'
TopStationInWeek.df
## StartStationNumber TopRentalStations
## 1 31101 Weekend
## 2 31200 AllWeek
## 3 31201 AllWeek
## 4 31229 Weekday
## 5 31623 Weekday
## 6 31241 Weekday
## 7 31247 Weekend
## 8 31258 Weekend
p <- DCMap + geom_point(data=TopStationInWeek.df2, aes(x = long, y = lat, color = TopRentalStations), size=5, alpha=1)
p
# rm(list=ls(all=TRUE)) # remove previous data
load("DemandPredictionDataSet.rdata") # load dataframe for predictive model
head(DemandPredictionDataSet)
## StartStationNumber AccountType year month day weekday hour minute
## 117692 31009 Casual 2010 9 15 Wednesday 14 5
## 117691 31500 Casual 2010 9 16 Thursday 10 11
## 117690 31101 Registered 2010 9 17 Friday 11 0
## 117689 31101 Registered 2010 9 17 Friday 11 4
## 117688 31101 Registered 2010 9 17 Friday 11 5
## 117686 31101 Registered 2010 9 17 Friday 11 19
## EndStationNumber end_year end_month end_day end_weekday end_hour
## 117692 31009 2010 9 15 Wednesday 14
## 117691 31500 2010 9 16 Thursday 10
## 117690 31101 2010 9 17 Friday 11
## 117689 31101 2010 9 17 Friday 11
## 117688 31101 2010 9 17 Friday 11
## 117686 31101 2010 9 17 Friday 11
# Count rental demand in each hour by start station
HourlyDemandPrediction.df <- count(DemandPredictionDataSet,c(1,3,4,5,7))
# Count rental demand in each hour by end station
HourlyDemandPrediction.end.df <- count(DemandPredictionDataSet,c(9,10,11,13,14))
TopStations <- count(DemandPredictionDataSet,c('StartStationNumber'))
TopStations$StartStationNumber <- as.numeric(as.character(TopStations$StartStationNumber))
# Sort the stations by frequency
TopStations <- TopStations[order(-TopStations$freq), ]
# Get top 10 stations
Top10Stations <- head(TopStations$StartStationNumber,10)
Top10Stations
## [1] 31200 31623 31201 31258 31247 31229 31241 31101 31214 31613
# Top X station
X <- 8
StationX <- Top10Stations[X]
# Create a dataset for the rental demand in Station X by hour and weekday
# Get the data only for Station X
StationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$StartStationNumber == StationX)
# Count the rental demand in Station X by hour
StationX.summary <- ddply(StationX.df,.(month,weekday,hour),summarise,N = length(hour))
StationX.summary.detail <- ddply(StationX.df,.(year,month,day,weekday,hour),summarise,N = length(hour))
# Count the returned bike in Station X by hour
EndStationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$EndStationNumber == StationX)
EndStationX.summary.detail <- ddply(EndStationX.df,.(end_year,end_month,end_day,end_weekday,end_hour),
summarise,N = length(end_hour))
colnames(EndStationX.summary.detail)[1:5] <- c('year','month','day','weekday','hour')
# Count the rental demand in Station X by hour and weekday
StationX.DayAndHour <- aggregate(N ~ weekday + hour, data = StationX.summary, FUN = sum)
# Visualize the rental demand in Station X by hour and weekday
ggplot(StationX.DayAndHour, aes(x = as.factor(hour), y = N, colour = weekday)) +
geom_point(data = StationX.DayAndHour, aes(group = weekday)) +
geom_line(data = StationX.DayAndHour, aes(group = weekday)) +
scale_x_discrete("hour") +
scale_y_continuous("N") +
theme_minimal() +
ggtitle(sprintf("Rental Demand in Station %d", Top10Stations[X])) +
theme_minimal() +
theme(plot.title=element_text(size=18))+
theme(plot.title = element_text(hjust = 0.5))
library(plotly)
# Get the rental demand of station X on Mondays at each hour and each month
Demand <- xtabs(N ~ hour + month, data = subset(StationX.summary, StationX.summary$weekday == 'Monday'))
plot_ly(x = 1:12, z = Demand, type = "surface") %>%
layout(scene = list(title = "Rental Demand on Saturdays in Station 31623, x = month, y= hour, z1 = count", xaxis = list(title = "Month"), yaxis = list(title = "Hour"), zaxis = list(title = "Demand")))
# Stations that use 12th polynomial model
Model1Station <- c(31623, 31201, 31258, 31247, 31229, 31214, 31101, 31613)
# Stations that use 10th polynomial model
Model2Station <- c(31200, 31241)
# 12th polynomial model for weekday
weekdayPolyModel1 <- function(input_dataset){
dataset.out.weekday <- input_dataset
dataset.out.weekday$year_p2 <- dataset.out.weekday$year^2
dataset.out.weekday$month_p2 <- dataset.out.weekday$month^2
dataset.out.weekday$hour_p2 <- dataset.out.weekday$hour^2
dataset.out.weekday$hour_p3 <- dataset.out.weekday$hour^3
dataset.out.weekday$hour_p4 <- dataset.out.weekday$hour^4
dataset.out.weekday$hour_p5 <- dataset.out.weekday$hour^5
dataset.out.weekday$hour_p6 <- dataset.out.weekday$hour^6
dataset.out.weekday$hour_p7 <- dataset.out.weekday$hour^7
dataset.out.weekday$hour_p8 <- dataset.out.weekday$hour^8
dataset.out.weekday$hour_p9 <- dataset.out.weekday$hour^9
dataset.out.weekday$hour_p10 <- dataset.out.weekday$hour^10
dataset.out.weekday$hour_p11 <- dataset.out.weekday$hour^11
dataset.out.weekday$hour_p12 <- dataset.out.weekday$hour^12
# Fitting the polynomial regression
regressor.weekday <- lm(formula = out ~ year+year_p2+month+month_p2+hour+hour_p2+hour_p3+hour_p4
+hour_p5+hour_p6+hour_p7+hour_p8+hour_p9+hour_p10+hour_p11+hour_p12,
data = dataset.out.weekday)
# Function returns the regressor for weekday
return(regressor.weekday)
}
# 10th polynomial model for weekday
weekdayPolyModel2 <- function(input_dataset){
dataset.out.weekday <- input_dataset
dataset.out.weekday$year_p2 <- dataset.out.weekday$year^2
dataset.out.weekday$month_p2 <- dataset.out.weekday$month^2
dataset.out.weekday$hour_p2 <- dataset.out.weekday$hour^2
dataset.out.weekday$hour_p3 <- dataset.out.weekday$hour^3
dataset.out.weekday$hour_p4 <- dataset.out.weekday$hour^4
dataset.out.weekday$hour_p5 <- dataset.out.weekday$hour^5
dataset.out.weekday$hour_p6 <- dataset.out.weekday$hour^6
dataset.out.weekday$hour_p7 <- dataset.out.weekday$hour^7
dataset.out.weekday$hour_p8 <- dataset.out.weekday$hour^8
dataset.out.weekday$hour_p9 <- dataset.out.weekday$hour^9
dataset.out.weekday$hour_p10 <- dataset.out.weekday$hour^10
# Fitting the polynomial regression
regressor.weekday <- lm(formula = out ~ year+year_p2+month+month_p2+hour+hour_p2+hour_p3+hour_p4+
hour_p5+hour_p6+hour_p7+hour_p8+hour_p9+hour_p10,
data = dataset.out.weekday)
return(regressor.weekday)
}
# 4th polynomial model for weekend
weekendPolyModel <- function(input_dataset){
dataset.out.weekday <- input_dataset
dataset.out.weekend$year_p2 <- dataset.out.weekend$year^2
dataset.out.weekend$month_p2 <- dataset.out.weekend$month^2
dataset.out.weekend$hour_p2 <- dataset.out.weekend$hour^2
dataset.out.weekend$hour_p3 <- dataset.out.weekend$hour^3
dataset.out.weekend$hour_p4 <- dataset.out.weekend$hour^4
# Fitting the polynomial regression
regressor.weekend <- lm(formula = out ~ year+year_p2+month+month_p2+
hour+hour_p2+hour_p3+hour_p4,
data = dataset.out.weekend)
# Function returns the regressor for weekday
return(regressor.weekend)
}
# Function to select the model for weekday prediction
predictStationXinWeekday <- function(SelectStation){
# If the station number is in Model1 then use Model1 (12th poly)
if (SelectStation %in% Model1Station){
RegressorWeekday = weekdayPolyModel1(dataset.out.weekday)
}
# If the station number is in Model2 then use Model2 (10th poly)
if (SelectStation %in% Model2Station){
RegressorWeekday = weekdayPolyModel2(dataset.out.weekday)
}
return(RegressorWeekday)
}
# Function to use model (4th poly) for weekend prediction
predictStationXinWeekend <- function(SelectStation){
RegressorWeekend = weekendPolyModel(dataset.out.weekend)
return(RegressorWeekend)
}
# Function to create the predicted dataset in weekday
predictedWeekdayDemand <- function(SelectStation,Year,Month,Hour){
# Select the model 1 (12th poly)
if (SelectStation %in% Model1Station){
modeldata <- data.frame(year = Year, year_p2 = Year^2, month = Month, month_p2 = Month^2,
hour = Hour, hour_p2 = Hour^2, hour_p3 = Hour^3, hour_p4 = Hour^4,
hour_p5 = Hour^5, hour_p6 = Hour^6, hour_p7 = Hour^7, hour_p8 = Hour^8,
hour_p9 = Hour^9, hour_p10 = Hour^10, hour_p11 = Hour^11, hour_p12 = Hour^12)
}
# Select the model 2 (10th poly)
if (SelectStation %in% Model2Station){
modeldata <- data.frame(year = Year, year_p2 = Year^2, month = Month, month_p2 = Month^2,
hour = Hour, hour_p2 = Hour^2, hour_p3 = Hour^3, hour_p4 = Hour^4,
hour_p5 = Hour^5, hour_p6 = Hour^6, hour_p7 = Hour^7, hour_p8 = Hour^8,
hour_p9 = Hour^9, hour_p10 = Hour^10)
}
# Create the predicted dataset in weekday
predictedWeekdayData = cbind.data.frame(hour = Hour,
demand = predict(myPolyModelWeekday,newdata = modeldata))
# Return the predicted dataset in weekday
return(predictedWeekdayData)
}
# Function to create the predicted dataset in weekend
predictedWeekendDemand <- function(SelectStation,Year,Month,Hour){
# Use 4th poly model
modeldata <- data.frame(year = Year, year_p2 = Year^2, month = Month, month_p2 = Month^2,
hour = Hour, hour_p2 = Hour^2, hour_p3 = Hour^3, hour_p4 = Hour^4)
# Create the predicted dataset in weekend
predictedWeekendData = cbind.data.frame(hour = Hour,
demand = predict(myPolyModelWeekend,newdata = modeldata))
# Return the predicted dataset in weekend
return(predictedWeekendData)
}
# Select station X
X <- 9
StationX <- Top10Stations[X]
# Create a dataset for the rental demand in Station X by hour and weekday
# Count the rental demand in Station X by hour
StationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$StartStationNumber == StationX)
# Count the rental demand in Station X by hour
StationX.summary <- ddply(StationX.df,.(month,weekday,hour),summarise,N = length(hour))
StationX.summary.detail <- ddply(StationX.df,.(year,month,day,weekday,hour),summarise,N = length(hour))
# Count the returned bike in Station X by hour
EndStationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$EndStationNumber == StationX)
EndStationX.summary.detail <- ddply(EndStationX.df,.(end_year,end_month,end_day,end_weekday,end_hour),
summarise,N = length(end_hour))
colnames(EndStationX.summary.detail)[1:5] <- c('year','month','day','weekday','hour')
StationX.demand.detail <- merge.data.frame(StationX.summary.detail, EndStationX.summary.detail,
by = c('year','month','day','weekday','hour'),
all = TRUE)
StationX.demand.detail[is.na(StationX.demand.detail)] <- 0
colnames(StationX.demand.detail)[6:7] <- c('out','In')
StationX.demand.detail$netout <- StationX.demand.detail$out - StationX.demand.detail$In
dataset.out <- StationX.demand.detail[c(1:5,6)]
# Create the dataset for weekday and weekend
weekend.position <- dataset.out$weekday == 'Saturday' | dataset.out$weekday == 'Sunday'
dataset.out.weekday <- subset(dataset.out, weekend.position == FALSE)
dataset.out.weekend <- subset(dataset.out, weekend.position == TRUE)
PredictYear <- 2015
PredictMonth <- 6
hour_grid <- seq(0, 23, 0.1)
# Predict weekday rental demand in Station X
myPolyModelWeekday <- predictStationXinWeekday(StationX)
summary(myPolyModelWeekday)
##
## Call:
## lm(formula = out ~ year + year_p2 + month + month_p2 + hour +
## hour_p2 + hour_p3 + hour_p4 + hour_p5 + hour_p6 + hour_p7 +
## hour_p8 + hour_p9 + hour_p10 + hour_p11 + hour_p12, data = dataset.out.weekday)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.748 -1.759 -0.326 1.401 16.431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.262e+05 1.788e+04 -35.03 <2e-16 ***
## year 6.221e+02 1.776e+01 35.03 <2e-16 ***
## year_p2 -1.545e-01 4.410e-03 -35.03 <2e-16 ***
## month 8.755e-01 1.895e-02 46.19 <2e-16 ***
## month_p2 -6.302e-02 1.414e-03 -44.57 <2e-16 ***
## hour -1.674e+01 7.354e-01 -22.76 <2e-16 ***
## hour_p2 3.371e+01 1.324e+00 25.46 <2e-16 ***
## hour_p3 -2.509e+01 9.403e-01 -26.68 <2e-16 ***
## hour_p4 9.378e+00 3.547e-01 26.44 <2e-16 ***
## hour_p5 -2.040e+00 8.085e-02 -25.23 <2e-16 ***
## hour_p6 2.799e-01 1.190e-02 23.52 <2e-16 ***
## hour_p7 -2.527e-02 1.170e-03 -21.60 <2e-16 ***
## hour_p8 1.525e-03 7.758e-05 19.66 <2e-16 ***
## hour_p9 -6.091e-05 3.429e-06 -17.76 <2e-16 ***
## hour_p10 1.546e-06 9.679e-08 15.97 <2e-16 ***
## hour_p11 -2.256e-08 1.578e-09 -14.29 <2e-16 ***
## hour_p12 1.441e-10 1.131e-11 12.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.721 on 33922 degrees of freedom
## Multiple R-squared: 0.3698, Adjusted R-squared: 0.3695
## F-statistic: 1244 on 16 and 33922 DF, p-value: < 2.2e-16
# Get the predicted demand data
predictedWeekdayData = predictedWeekdayDemand(StationX,PredictYear,PredictMonth,hour_grid)
# Get the actual deamnd data in the month and year
PointToPlot1 <- dataset.out.weekday$year == PredictYear & dataset.out.weekday$month == PredictMonth
plotData <- dataset.out.weekday[PointToPlot1, c(5,6)]
# Plot the boxplot of the actual data and lineplot of the predicted data
plotWeekday <- ggplot()+
geom_boxplot(data = plotData,aes(x=hour,y =out, group = hour))+
geom_line(data = predictedWeekdayData, aes(x=hour, y=demand), colour = 'blue',group = 1) +
ggtitle(paste("Weekday Rental Demand at Station ",StationX, " in ",PredictYear, "-", PredictMonth, sep = "")) +
xlab('Hour') +
ylab('Demand')+
theme(plot.title = element_text(hjust = 0.5))
plotWeekday
#Predict weekend rental demand in Station X
myPolyModelWeekend <- predictStationXinWeekend(StationX)
summary(myPolyModelWeekend)
##
## Call:
## lm(formula = out ~ year + year_p2 + month + month_p2 + hour +
## hour_p2 + hour_p3 + hour_p4, data = dataset.out.weekend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6311 -1.8848 -0.2882 1.5234 17.0175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.382e+05 3.002e+04 -27.92 <2e-16 ***
## year 8.326e+02 2.982e+01 27.92 <2e-16 ***
## year_p2 -2.068e-01 7.406e-03 -27.92 <2e-16 ***
## month 1.069e+00 3.157e-02 33.86 <2e-16 ***
## month_p2 -7.858e-02 2.365e-03 -33.23 <2e-16 ***
## hour -2.085e+00 6.590e-02 -31.64 <2e-16 ***
## hour_p2 4.635e-01 1.198e-02 38.70 <2e-16 ***
## hour_p3 -2.786e-02 7.766e-04 -35.88 <2e-16 ***
## hour_p4 5.029e-04 1.649e-05 30.50 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.924 on 14129 degrees of freedom
## Multiple R-squared: 0.4336, Adjusted R-squared: 0.4333
## F-statistic: 1352 on 8 and 14129 DF, p-value: < 2.2e-16
# Get the predicted demand data
predictedWeekendData = predictedWeekendDemand(StationX,PredictYear,PredictMonth,hour_grid)
# Get the actual deamnd data in the month and year
PointToPlot1 <- dataset.out.weekend$year == PredictYear & dataset.out.weekend$month == PredictMonth
plotData <- dataset.out.weekend[PointToPlot1, c(5,6)]
# Plot the boxplot of the actual data and lineplot of the predicted data
plotWeekend <- ggplot()+
geom_boxplot(data = plotData,aes(x=hour,y =out, group = hour))+
geom_line(data = predictedWeekendData, aes(x=hour, y=demand), colour = 'blue',group = 1) +
ggtitle(paste("Weekend Rental Demand at Station ",StationX, " in ",PredictYear, "-", PredictMonth, sep = "")) +
xlab('Hour') +
ylab('Demand')+
theme(plot.title = element_text(hjust = 0.5))
plotWeekend